@@ -32,6 +32,8 @@ module Agents |
||
32 | 32 |
|
33 | 33 |
Note that for all of the formats, whatever you extract MUST have the same number of matches for each extractor. E.g., if you're extracting rows, all extractors must match all rows. For generating CSS selectors, something like [SelectorGadget](http://selectorgadget.com) may be helpful. |
34 | 34 |
|
35 |
+ Can be configured to use HTTP basic auth by including the `basic_auth` parameter with `username:password`. |
|
36 |
+ |
|
35 | 37 |
Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent. |
36 | 38 |
MD |
37 | 39 |
|
@@ -70,7 +72,11 @@ module Agents |
||
70 | 72 |
def check |
71 | 73 |
hydra = Typhoeus::Hydra.new |
72 | 74 |
log "Fetching #{options['url']}" |
73 |
- request = Typhoeus::Request.new(options['url'], :followlocation => true) |
|
75 |
+ request_opts = {:followlocation => true} |
|
76 |
+ if !options['basic_auth'].blank? |
|
77 |
+ request_opts[:userpwd] = options['basic_auth'] |
|
78 |
+ end |
|
79 |
+ request = Typhoeus::Request.new(options['url'], request_opts) |
|
74 | 80 |
request.on_failure do |response| |
75 | 81 |
error "Failed: #{response.inspect}" |
76 | 82 |
end |
@@ -1,189 +1,220 @@ |
||
1 | 1 |
require 'spec_helper' |
2 | 2 |
|
3 | 3 |
describe Agents::WebsiteAgent do |
4 |
- before do |
|
5 |
- stub_request(:any, /xkcd/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200) |
|
6 |
- @site = { |
|
7 |
- 'name' => "XKCD", |
|
8 |
- 'expected_update_period_in_days' => 2, |
|
9 |
- 'type' => "html", |
|
10 |
- 'url' => "http://xkcd.com", |
|
11 |
- 'mode' => 'on_change', |
|
12 |
- 'extract' => { |
|
13 |
- 'url' => {'css' => "#comic img", 'attr' => "src"}, |
|
14 |
- 'title' => {'css' => "#comic img", 'attr' => "title"} |
|
4 |
+ describe "checking without basic auth" do |
|
5 |
+ before do |
|
6 |
+ stub_request(:any, /xkcd/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200) |
|
7 |
+ @site = { |
|
8 |
+ 'name' => "XKCD", |
|
9 |
+ 'expected_update_period_in_days' => 2, |
|
10 |
+ 'type' => "html", |
|
11 |
+ 'url' => "http://xkcd.com", |
|
12 |
+ 'mode' => 'on_change', |
|
13 |
+ 'extract' => { |
|
14 |
+ 'url' => {'css' => "#comic img", 'attr' => "src"}, |
|
15 |
+ 'title' => {'css' => "#comic img", 'attr' => "title"} |
|
16 |
+ } |
|
15 | 17 |
} |
16 |
- } |
|
17 |
- @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @site) |
|
18 |
- @checker.user = users(:bob) |
|
19 |
- @checker.save! |
|
20 |
- end |
|
21 |
- |
|
22 |
- describe "#check" do |
|
23 |
- it "should check for changes" do |
|
24 |
- lambda { @checker.check }.should change { Event.count }.by(1) |
|
25 |
- lambda { @checker.check }.should_not change { Event.count } |
|
18 |
+ @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @site) |
|
19 |
+ @checker.user = users(:bob) |
|
20 |
+ @checker.save! |
|
26 | 21 |
end |
27 | 22 |
|
28 |
- it "should always save events when in :all mode" do |
|
29 |
- lambda { |
|
30 |
- @site['mode'] = 'all' |
|
23 |
+ describe "#check" do |
|
24 |
+ it "should check for changes" do |
|
25 |
+ lambda { @checker.check }.should change { Event.count }.by(1) |
|
26 |
+ lambda { @checker.check }.should_not change { Event.count } |
|
27 |
+ end |
|
28 |
+ |
|
29 |
+ it "should always save events when in :all mode" do |
|
30 |
+ lambda { |
|
31 |
+ @site['mode'] = 'all' |
|
32 |
+ @checker.options = @site |
|
33 |
+ @checker.check |
|
34 |
+ @checker.check |
|
35 |
+ }.should change { Event.count }.by(2) |
|
36 |
+ end |
|
37 |
+ |
|
38 |
+ it "should log an error if the number of results for a set of extraction patterns differs" do |
|
39 |
+ @site['extract']['url']['css'] = "div" |
|
31 | 40 |
@checker.options = @site |
32 | 41 |
@checker.check |
33 |
- @checker.check |
|
34 |
- }.should change { Event.count }.by(2) |
|
35 |
- end |
|
36 |
- |
|
37 |
- it "should log an error if the number of results for a set of extraction patterns differs" do |
|
38 |
- @site['extract']['url']['css'] = "div" |
|
39 |
- @checker.options = @site |
|
40 |
- @checker.check |
|
41 |
- @checker.logs.first.message.should =~ /Got an uneven number of matches/ |
|
42 |
+ @checker.logs.first.message.should =~ /Got an uneven number of matches/ |
|
43 |
+ end |
|
42 | 44 |
end |
43 |
- end |
|
44 | 45 |
|
45 |
- describe '#working?' do |
|
46 |
- it 'checks if events have been received within the expected receive period' do |
|
47 |
- stubbed_time = Time.now |
|
48 |
- stub(Time).now { stubbed_time } |
|
46 |
+ describe '#working?' do |
|
47 |
+ it 'checks if events have been received within the expected receive period' do |
|
48 |
+ stubbed_time = Time.now |
|
49 |
+ stub(Time).now { stubbed_time } |
|
49 | 50 |
|
50 |
- @checker.should_not be_working # No events created |
|
51 |
- @checker.check |
|
52 |
- @checker.reload.should be_working # Just created events |
|
53 |
- |
|
54 |
- @checker.error "oh no!" |
|
55 |
- @checker.reload.should_not be_working # There is a recent error |
|
51 |
+ @checker.should_not be_working # No events created |
|
52 |
+ @checker.check |
|
53 |
+ @checker.reload.should be_working # Just created events |
|
56 | 54 |
|
57 |
- stubbed_time = 20.minutes.from_now |
|
58 |
- @checker.events.delete_all |
|
59 |
- @checker.check |
|
60 |
- @checker.reload.should be_working # There is a newer event now |
|
55 |
+ @checker.error "oh no!" |
|
56 |
+ @checker.reload.should_not be_working # There is a recent error |
|
61 | 57 |
|
62 |
- stubbed_time = 2.days.from_now |
|
63 |
- @checker.reload.should_not be_working # Two days have passed without a new event having been created |
|
64 |
- end |
|
65 |
- end |
|
58 |
+ stubbed_time = 20.minutes.from_now |
|
59 |
+ @checker.events.delete_all |
|
60 |
+ @checker.check |
|
61 |
+ @checker.reload.should be_working # There is a newer event now |
|
66 | 62 |
|
67 |
- describe "parsing" do |
|
68 |
- it "parses CSS" do |
|
69 |
- @checker.check |
|
70 |
- event = Event.last |
|
71 |
- event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png" |
|
72 |
- event.payload['title'].should =~ /^Biologists play reverse/ |
|
63 |
+ stubbed_time = 2.days.from_now |
|
64 |
+ @checker.reload.should_not be_working # Two days have passed without a new event having been created |
|
65 |
+ end |
|
73 | 66 |
end |
74 | 67 |
|
75 |
- it "should turn relative urls to absolute" do |
|
76 |
- rel_site = { |
|
77 |
- 'name' => "XKCD", |
|
78 |
- 'expected_update_period_in_days' => 2, |
|
79 |
- 'type' => "html", |
|
80 |
- 'url' => "http://xkcd.com", |
|
81 |
- 'mode' => :on_change, |
|
82 |
- 'extract' => { |
|
83 |
- 'url' => {'css' => "#topLeft a", 'attr' => "href"}, |
|
84 |
- 'title' => {'css' => "#topLeft a", 'text' => "true"} |
|
85 |
- } |
|
86 |
- } |
|
87 |
- rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site) |
|
88 |
- rel.user = users(:bob) |
|
89 |
- rel.save! |
|
90 |
- rel.check |
|
91 |
- event = Event.last |
|
92 |
- event.payload['url'].should == "http://xkcd.com/about" |
|
93 |
- end |
|
68 |
+ describe "parsing" do |
|
69 |
+ it "parses CSS" do |
|
70 |
+ @checker.check |
|
71 |
+ event = Event.last |
|
72 |
+ event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png" |
|
73 |
+ event.payload['title'].should =~ /^Biologists play reverse/ |
|
74 |
+ end |
|
94 | 75 |
|
95 |
- describe "JSON" do |
|
96 |
- it "works with paths" do |
|
97 |
- json = { |
|
98 |
- 'response' => { |
|
99 |
- 'version' => 2, |
|
100 |
- 'title' => "hello!" |
|
101 |
- } |
|
102 |
- } |
|
103 |
- stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200) |
|
104 |
- site = { |
|
105 |
- 'name' => "Some JSON Response", |
|
76 |
+ it "should turn relative urls to absolute" do |
|
77 |
+ rel_site = { |
|
78 |
+ 'name' => "XKCD", |
|
106 | 79 |
'expected_update_period_in_days' => 2, |
107 |
- 'type' => "json", |
|
108 |
- 'url' => "http://json-site.com", |
|
109 |
- 'mode' => 'on_change', |
|
80 |
+ 'type' => "html", |
|
81 |
+ 'url' => "http://xkcd.com", |
|
82 |
+ 'mode' => :on_change, |
|
110 | 83 |
'extract' => { |
111 |
- 'version' => {'path' => "response.version"}, |
|
112 |
- 'title' => {'path' => "response.title"} |
|
84 |
+ 'url' => {'css' => "#topLeft a", 'attr' => "href"}, |
|
85 |
+ 'title' => {'css' => "#topLeft a", 'text' => "true"} |
|
113 | 86 |
} |
114 | 87 |
} |
115 |
- checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
116 |
- checker.user = users(:bob) |
|
117 |
- checker.save! |
|
118 |
- |
|
119 |
- checker.check |
|
88 |
+ rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site) |
|
89 |
+ rel.user = users(:bob) |
|
90 |
+ rel.save! |
|
91 |
+ rel.check |
|
120 | 92 |
event = Event.last |
121 |
- event.payload['version'].should == 2 |
|
122 |
- event.payload['title'].should == "hello!" |
|
93 |
+ event.payload['url'].should == "http://xkcd.com/about" |
|
123 | 94 |
end |
124 | 95 |
|
125 |
- it "can handle arrays" do |
|
126 |
- json = { |
|
127 |
- 'response' => { |
|
128 |
- 'data' => [ |
|
129 |
- {'title' => "first", 'version' => 2}, |
|
130 |
- {'title' => "second", 'version' => 2.5} |
|
131 |
- ] |
|
96 |
+ describe "JSON" do |
|
97 |
+ it "works with paths" do |
|
98 |
+ json = { |
|
99 |
+ 'response' => { |
|
100 |
+ 'version' => 2, |
|
101 |
+ 'title' => "hello!" |
|
102 |
+ } |
|
132 | 103 |
} |
133 |
- } |
|
134 |
- stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200) |
|
135 |
- site = { |
|
136 |
- 'name' => "Some JSON Response", |
|
137 |
- 'expected_update_period_in_days' => 2, |
|
138 |
- 'type' => "json", |
|
139 |
- 'url' => "http://json-site.com", |
|
140 |
- 'mode' => 'on_change', |
|
141 |
- 'extract' => { |
|
142 |
- :title => {'path' => "response.data[*].title"}, |
|
143 |
- :version => {'path' => "response.data[*].version"} |
|
104 |
+ stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200) |
|
105 |
+ site = { |
|
106 |
+ 'name' => "Some JSON Response", |
|
107 |
+ 'expected_update_period_in_days' => 2, |
|
108 |
+ 'type' => "json", |
|
109 |
+ 'url' => "http://json-site.com", |
|
110 |
+ 'mode' => 'on_change', |
|
111 |
+ 'extract' => { |
|
112 |
+ 'version' => {'path' => "response.version"}, |
|
113 |
+ 'title' => {'path' => "response.title"} |
|
114 |
+ } |
|
144 | 115 |
} |
145 |
- } |
|
146 |
- checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
147 |
- checker.user = users(:bob) |
|
148 |
- checker.save! |
|
116 |
+ checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
117 |
+ checker.user = users(:bob) |
|
118 |
+ checker.save! |
|
149 | 119 |
|
150 |
- lambda { |
|
151 | 120 |
checker.check |
152 |
- }.should change { Event.count }.by(2) |
|
153 |
- |
|
154 |
- event = Event.all[-1] |
|
155 |
- event.payload['version'].should == 2.5 |
|
156 |
- event.payload['title'].should == "second" |
|
121 |
+ event = Event.last |
|
122 |
+ event.payload['version'].should == 2 |
|
123 |
+ event.payload['title'].should == "hello!" |
|
124 |
+ end |
|
125 |
+ |
|
126 |
+ it "can handle arrays" do |
|
127 |
+ json = { |
|
128 |
+ 'response' => { |
|
129 |
+ 'data' => [ |
|
130 |
+ {'title' => "first", 'version' => 2}, |
|
131 |
+ {'title' => "second", 'version' => 2.5} |
|
132 |
+ ] |
|
133 |
+ } |
|
134 |
+ } |
|
135 |
+ stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200) |
|
136 |
+ site = { |
|
137 |
+ 'name' => "Some JSON Response", |
|
138 |
+ 'expected_update_period_in_days' => 2, |
|
139 |
+ 'type' => "json", |
|
140 |
+ 'url' => "http://json-site.com", |
|
141 |
+ 'mode' => 'on_change', |
|
142 |
+ 'extract' => { |
|
143 |
+ :title => {'path' => "response.data[*].title"}, |
|
144 |
+ :version => {'path' => "response.data[*].version"} |
|
145 |
+ } |
|
146 |
+ } |
|
147 |
+ checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
148 |
+ checker.user = users(:bob) |
|
149 |
+ checker.save! |
|
150 |
+ |
|
151 |
+ lambda { |
|
152 |
+ checker.check |
|
153 |
+ }.should change { Event.count }.by(2) |
|
154 |
+ |
|
155 |
+ event = Event.all[-1] |
|
156 |
+ event.payload['version'].should == 2.5 |
|
157 |
+ event.payload['title'].should == "second" |
|
158 |
+ |
|
159 |
+ event = Event.all[-2] |
|
160 |
+ event.payload['version'].should == 2 |
|
161 |
+ event.payload['title'].should == "first" |
|
162 |
+ end |
|
163 |
+ |
|
164 |
+ it "stores the whole object if :extract is not specified" do |
|
165 |
+ json = { |
|
166 |
+ 'response' => { |
|
167 |
+ 'version' => 2, |
|
168 |
+ 'title' => "hello!" |
|
169 |
+ } |
|
170 |
+ } |
|
171 |
+ stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200) |
|
172 |
+ site = { |
|
173 |
+ 'name' => "Some JSON Response", |
|
174 |
+ 'expected_update_period_in_days' => 2, |
|
175 |
+ 'type' => "json", |
|
176 |
+ 'url' => "http://json-site.com", |
|
177 |
+ 'mode' => 'on_change' |
|
178 |
+ } |
|
179 |
+ checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
180 |
+ checker.user = users(:bob) |
|
181 |
+ checker.save! |
|
157 | 182 |
|
158 |
- event = Event.all[-2] |
|
159 |
- event.payload['version'].should == 2 |
|
160 |
- event.payload['title'].should == "first" |
|
183 |
+ checker.check |
|
184 |
+ event = Event.last |
|
185 |
+ event.payload['response']['version'].should == 2 |
|
186 |
+ event.payload['response']['title'].should == "hello!" |
|
187 |
+ end |
|
161 | 188 |
end |
189 |
+ end |
|
190 |
+ end |
|
162 | 191 |
|
163 |
- it "stores the whole object if :extract is not specified" do |
|
164 |
- json = { |
|
165 |
- 'response' => { |
|
166 |
- 'version' => 2, |
|
167 |
- 'title' => "hello!" |
|
168 |
- } |
|
169 |
- } |
|
170 |
- stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200) |
|
171 |
- site = { |
|
172 |
- 'name' => "Some JSON Response", |
|
173 |
- 'expected_update_period_in_days' => 2, |
|
174 |
- 'type' => "json", |
|
175 |
- 'url' => "http://json-site.com", |
|
176 |
- 'mode' => 'on_change' |
|
177 |
- } |
|
178 |
- checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
179 |
- checker.user = users(:bob) |
|
180 |
- checker.save! |
|
192 |
+ describe "checking with http basic auth" do |
|
193 |
+ before do |
|
194 |
+ stub_request(:any, /user:pass/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200) |
|
195 |
+ @site = { |
|
196 |
+ 'name' => "XKCD", |
|
197 |
+ 'expected_update_period_in_days' => 2, |
|
198 |
+ 'type' => "html", |
|
199 |
+ 'url' => "http://www.example.com", |
|
200 |
+ 'mode' => 'on_change', |
|
201 |
+ 'extract' => { |
|
202 |
+ 'url' => {'css' => "#comic img", 'attr' => "src"}, |
|
203 |
+ 'title' => {'css' => "#comic img", 'attr' => "title"} |
|
204 |
+ }, |
|
205 |
+ 'basic_auth' => "user:pass" |
|
206 |
+ } |
|
207 |
+ @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @site) |
|
208 |
+ @checker.user = users(:bob) |
|
209 |
+ @checker.save! |
|
210 |
+ end |
|
181 | 211 |
|
182 |
- checker.check |
|
183 |
- event = Event.last |
|
184 |
- event.payload['response']['version'].should == 2 |
|
185 |
- event.payload['response']['title'].should == "hello!" |
|
212 |
+ describe "#check" do |
|
213 |
+ it "should check for changes" do |
|
214 |
+ lambda { @checker.check }.should change { Event.count }.by(1) |
|
215 |
+ lambda { @checker.check }.should_not change { Event.count } |
|
186 | 216 |
end |
187 | 217 |
end |
188 | 218 |
end |
219 |
+ |
|
189 | 220 |
end |